Code
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import altair as alt
import foliumimport geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import altair as alt
import foliumThe following webpage takes a deep dive into the Aid Worker Security Database, which describes violence against aid workers across the globe. The code describes violence against aid workers from different organizations based on attack type and context, country, organization, whether workers were national or international, and, most importantly for this page, gender.
The following analysis looks to guide you through the distribution of attacks across the globe, and demonstrate that female aid workers are in significantly more danger than their male counterparts.
Lets first take a look at the different variables included in the dataset. The dashboards below show the geographic distribution of these variables.
idea: a few sample descriptions to give context
# Step 1: Load your data
df = pd.read_csv("data/security_incidents.csv")
# Group by country for different categories (Nationals, Internationals, and Incidents)
nationals = df.groupby('Country')['Total nationals'].sum().reset_index(name="total_nationals")
internationals = df.groupby('Country')['Total internationals'].sum().reset_index(name="total_internationals")
incidents_by_country = df.groupby('Country').size().reset_index(name='incident_count')
# Load GeoJSON data for world countries using GeoPandas
geo_data = gpd.read_file('ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')
# Merge the datasets with the GeoDataFrame
geo_data = geo_data.merge(nationals, left_on="ADMIN", right_on="Country", how="left")
geo_data = geo_data.merge(internationals, left_on="ADMIN", right_on="Country", how="left")
geo_data = geo_data.merge(incidents_by_country, left_on="ADMIN", right_on="Country", how="left")
df_clean_location = df.dropna(subset=["Latitude", "Longitude"]).sample(200)
m = folium.Map(location=[14.07092855, 103.099916], zoom_start=5)
for index, row in df_clean_location.iterrows():
# Extract relevant columns from the dataframe
latitude = row["Latitude"]
longitude = row["Longitude"]
description = row["Details"] # You can adjust the description field if needed
# Create a marker for each row
folium.Marker(
location=[latitude, longitude],
popup=folium.Popup(description, max_width=300),
tooltip=f"{row['Country']}<br> Description: {description}"
).add_to(m)
m
# Create a choropleth map for Nationals, Internationals, and Incident
fig = go.Figure()
# Add choropleth for Nationals
fig.add_trace(go.Choropleth(
z=geo_data['total_nationals'],
hoverinfo='location+z',
locations=geo_data['ADMIN'],
locationmode='country names',
colorscale='RdYlGn_r',
colorbar_title="Nationals Affected",
showscale=True,
name="Nationals"
))
# Add choropleth for Internationals
fig.add_trace(go.Choropleth(
z=geo_data['total_internationals'],
hoverinfo='location+z',
locations=geo_data['ADMIN'],
locationmode='country names',
colorscale='YlOrRd',
colorbar_title="Internationals Affected",
showscale=True,
name="Internationals"
))
# Add choropleth for Incidents
fig.add_trace(go.Choropleth(
z=geo_data['incident_count'],
hoverinfo='location+z',
locations=geo_data['ADMIN'],
locationmode='country names',
colorscale='BuGn',
colorbar_title="Incidents Count",
showscale=True,
name="Incidents"
))
# Update layout to add dropdown and other configurations
fig.update_layout(
geo=dict(
showcoastlines=True,
coastlinecolor="Black",
projection_type="natural earth",
),
updatemenus=[
{
'buttons': [
{
'args': [{'visible': [True, False, False]}, {'title': 'Nationals Affected'}],
'label': 'Nationals',
'method': 'update'
},
{
'args': [{'visible': [False, True, False]}, {'title': 'Internationals Affected'}],
'label': 'Internationals',
'method': 'update'
},
{
'args': [{'visible': [False, False, True]}, {'title': 'Incidents Count'}],
'label': 'Incidents',
'method': 'update'
}
],
'direction': 'down',
'showactive': True,
'x': 0,
'xanchor': 'left',
'y': 1,
'yanchor': 'top'
}
],
title="Incidents by Country"
)
fig.write_html("choropleth_map_with_dropdown.html")
from IPython.display import IFrame
# Display the saved file directly in Jupyter Notebook
IFrame("choropleth_map_with_dropdown.html", width=800, height=600)We can see from above that mainly nationals are targetted, and a majority of incidents take place across Northern Africa and the Middle East. This is most likely due to the distribution of nationals vs. international aid workers stationed across the countries, with national aid workers being in much higher numbers.
unique_values = df['Means of attack'].unique()
means_of_attack = df.groupby(['Country', 'Means of attack']).size().unstack(fill_value=0)
geo_data = gpd.read_file('ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')
for incident in unique_values:
# Ensure the column name is consistent (spaces replaced with underscores and lowercased)
incident_column = f'{incident.replace(" ", "_").lower()}_count'
# Check if the incident type is present in the incident_counts DataFrame
if incident in means_of_attack.columns:
# Add the incident counts from the incident_counts DataFrame
geo_data = geo_data.merge(
means_of_attack[incident], left_on="ADMIN", right_index=True, how="left"
).rename(columns={incident: incident_column})
else:
# If the incident type is not in the incident_counts DataFrame, fill with 0
geo_data[incident_column] = 0
# Create a choropleth map for Nationals, Internationals, and Incident
fig = go.Figure()
# Add choropleth for each incident type
for incident in unique_values:
# Update the z values and title dynamically for each incident
# Calculate the total value for the column
total_value = geo_data[f'{incident.replace(" ", "_").lower()}_count'].sum()
# Calculate the percentage for each country
geo_data[f'{incident}_percentage'] = (geo_data[f'{incident.replace(" ", "_").lower()}_count'] / total_value) * 100
fig.add_trace(go.Choropleth(
z=geo_data[f'{incident.replace(" ", "_").lower()}_count'], # Assuming you have these fields in your geo_data
hoverinfo='location+z',
locations=geo_data['ADMIN'], # Country names should be in the 'ADMIN' field
locationmode='country names',
colorscale='Viridis', # You can change the color scale for each incident type if you like
colorbar_title=f"{incident} Count",
showscale=True,
name=incident,
text=geo_data[f'{incident}_percentage'].apply(lambda x: f"{x:.2f}%")
))
# Update layout to add dropdown and other configurations
fig.update_layout(
geo=dict(
showcoastlines=True,
coastlinecolor="Black",
projection_type="natural earth",
),
updatemenus=[
{
'buttons': [
# Create a button for each incident type to switch between
*[
{
'args': [{'visible': [i == idx for i in range(len(unique_values))]}, {'title': f'{incident} Count'}],
'label': incident,
'method': 'update'
}
for idx, incident in enumerate(unique_values)
]
],
'direction': 'down',
'showactive': True,
'x': 0,
'xanchor': 'left',
'y': 1,
'yanchor': 'top'
}
],
title="Incidents by Country"
)
fig.write_html("choropleth_map_with_dropdown2.html")
from IPython.display import IFrame
# Display the saved file directly in Jupyter Notebook
IFrame("choropleth_map_with_dropdown2.html", width=800, height=600)We look at the means of attack above across all countries that have incidents. Aid organizations often struggle with reporting (as this is not their first priority), so many of the attacks are of unknown means.
un = df.groupby('Country')['UN'].sum().reset_index(name="total_un")
ingo = df.groupby('Country')['INGO'].sum().reset_index(name="total_ingo")
icrc = df.groupby('Country')['ICRC'].sum().reset_index(name="total_icrc")
nrcs_ifrc = df.groupby('Country')['NRCS and IFRC'].sum().reset_index(name="total_nrcs_ifrc")
nngo = df.groupby('Country')['NNGO'].sum().reset_index(name="total_nngo")
other = df.groupby('Country')['Other'].sum().reset_index(name="total_other")
ingo = ingo.rename(columns={'Country': 'Country_ingo'})
nngo = nngo.rename(columns={'Country': 'Country_nngo'})
other = other.rename(columns={'Country': 'Country_other'})
geo_data = gpd.read_file('ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')
geo_data = geo_data.merge(un, left_on="ADMIN", right_on="Country", how="left")
geo_data = geo_data.merge(ingo, left_on="ADMIN", right_on="Country_ingo", how="left")
geo_data = geo_data.merge(icrc, left_on="ADMIN", right_on="Country", how="left")
geo_data = geo_data.merge(nrcs_ifrc, left_on="ADMIN", right_on="Country", how="left")
geo_data = geo_data.merge(nngo, left_on="ADMIN", right_on="Country_nngo", how="left")
geo_data = geo_data.merge(other, left_on="ADMIN", right_on="Country_other", how="left")
# List of the categories for the dropdown
columns = ['UN', 'INGO', 'ICRC', 'NRCS and IFRC', 'NNGO', 'Other']
fig = go.Figure()
# Add choropleth for each column (UN, INGO, ICRC, etc.)
for column in columns:
# Get the appropriate column name for the data
if column == 'NRCS and IFRC':
field_name = 'total_nrcs_ifrc'
else:
field_name = f'total_{column.lower()}'
# Calculate the total value for the column
total_value = geo_data[field_name].sum()
# Calculate the percentage for each country
geo_data[f'{field_name}_percentage'] = (geo_data[field_name] / total_value) * 100
# Add the choropleth trace for the current column
fig.add_trace(go.Choropleth(
z=geo_data[field_name], # Value for the choropleth (total number)
hoverinfo='location+z+text', # Add text info to hover
locations=geo_data['ADMIN'], # Country names should be in the 'ADMIN' field
locationmode='country names',
colorscale='Viridis', # You can change the color scale for each category if you like
colorbar_title=f"{column} Count",
showscale=True,
name=column,
text=geo_data[f'{field_name}_percentage'].apply(lambda x: f"{x:.2f}%") # Add percentage to hover text
))
# Update layout to add dropdown and other configurations
fig.update_layout(
geo=dict(
showcoastlines=True,
coastlinecolor="Black",
projection_type="natural earth",
),
updatemenus=[
{
'buttons': [
# Create a button for each category to switch between
*[
{
'args': [{'visible': [i == idx for i in range(len(columns))]}, {'title': f'{column} Count'}],
'label': column,
'method': 'update'
}
for idx, column in enumerate(columns)
]
],
'direction': 'down',
'showactive': True,
'x': 0,
'xanchor': 'left',
'y': 1,
'yanchor': 'top'
}
],
title="Counts by Country"
)
# Save the figure to an HTML file
fig.write_html("choropleth_map_with_dropdown.html")
from IPython.display import IFrame
# Display the saved file directly in Jupyter Notebook
IFrame("choropleth_map_with_dropdown.html", width=800, height=600)females = df.groupby('Country')['Gender Male'].sum().reset_index(name="male_total")
males = df.groupby('Country')['Gender Female'].sum().reset_index(name="female_total")
# Load GeoJSON data for world countries using GeoPandas
geo_data = gpd.read_file('ne_110m_admin_0_countries/ne_110m_admin_0_countries.shp')
# Merge the datasets with the GeoDataFrame
geo_data = geo_data.merge(females, left_on="ADMIN", right_on="Country", how="left")
geo_data = geo_data.merge(males, left_on="ADMIN", right_on="Country", how="left")
# Create a choropleth map for Nationals, Internationals, and Incident
fig = go.Figure()
# Add choropleth for Nationals
fig.add_trace(go.Choropleth(
z=geo_data['male_total'],
hoverinfo='location+z',
locations=geo_data['ADMIN'],
locationmode='country names',
colorscale='RdYlGn_r',
colorbar_title="Total Males",
showscale=True,
name="Males"
))
# Add choropleth for Internationals
fig.add_trace(go.Choropleth(
z=geo_data['female_total'],
hoverinfo='location+z',
locations=geo_data['ADMIN'],
locationmode='country names',
colorscale='YlOrRd',
colorbar_title="Total Females",
showscale=True,
name="Females"
))
# Update layout to add dropdown and other configurations
fig.update_layout(
geo=dict(
showcoastlines=True,
coastlinecolor="Black",
projection_type="natural earth",
),
updatemenus=[
{
'buttons': [
{
'args': [{'visible': [True, False, False]}, {'title': 'Males Affected'}],
'label': 'Males',
'method': 'update'
},
{
'args': [{'visible': [False, True, False]}, {'title': 'Females Affected'}],
'label': 'Females',
'method': 'update'
}
],
'direction': 'down',
'showactive': True,
'x': 0,
'xanchor': 'left',
'y': 1,
'yanchor': 'top'
}
],
title="Gender Distribution"
)
fig.write_html("choropleth_map_with_dropdown3.html")
from IPython.display import IFrame
# Display the saved file directly in Jupyter Notebook
IFrame("choropleth_map_with_dropdown3.html", width=800, height=600)import matplotlib.pyplot as plt
df_clean = df.dropna()
df_clean['Date'] = pd.to_datetime(df_clean['Year'].astype(str) + '-' + df_clean['Month'].astype(int).astype(str) + '-01', format='%Y-%m-%d')
df_clean.head()
male = df_clean.groupby('Month')['Gender Male'].sum().reset_index(name="total_male")
female = df_clean.groupby('Month')['Gender Female'].sum().reset_index(name="total_female")
plt.bar(female["Month"] - 0.2, female["total_female"] / female["total_female"].sum(), width=0.4, label="Female", color='blue', align='center', alpha = .5) # Female bars
plt.bar(male["Month"] + 0.2, male["total_male"] / male["total_male"].sum(), width=0.4, label="Male", color='orange', align='center', alpha = 0.5) # Male bars
plt.plot(female["Month"], female["total_female"] / female["total_female"].sum(), color='blue', marker='o', linestyle='-', linewidth=2)
plt.plot(male["Month"], male["total_male"] / male["total_male"].sum(), color='orange', marker='o', linestyle='-', linewidth=2)
plt.xlabel('Month')
plt.ylabel('Percentage Of All Incidents')
plt.title('Female vs Male Total by Month')
plt.legend()
plt.show()
/var/folders/8n/rfnmqbtd0tvdljpw_xbt64jh0000gn/T/ipykernel_86281/2091133605.py:4: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_clean = df.dropna()
df_clean['Date'] = pd.to_datetime(df_clean['Year'].astype(str) + '-' + df_clean['Month'].astype(int).astype(str) + '-01', format='%Y-%m-%d')
df_clean.head()
import matplotlib.pyplot as plt
# Male and female data by Location
male_location = df_clean.groupby('Location')['Gender Male'].sum().reset_index(name="total_male")
female_location = df_clean.groupby('Location')['Gender Female'].sum().reset_index(name="total_female")
locations = female_location["Location"]
x_location = range(len(locations)) # Create an index for each location
# Male and female data by Motive
male_motive = df_clean.groupby('Motive')['Gender Male'].sum().reset_index(name="total_male")
female_motive = df_clean.groupby('Motive')['Gender Female'].sum().reset_index(name="total_female")
motives = female_motive["Motive"]
x_motive = range(len(motives)) # Create an index for each motive
# Create a figure with 2 subplots (2 facets)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))
# First subplot (Location)
ax1.bar([i - 0.2 for i in x_location], female_location["total_female"] / female_location["total_female"].sum(), width=0.4, label="Female", color='blue', align='center')
ax1.bar([i + 0.2 for i in x_location], male_location["total_male"] / male_location["total_male"].sum(), width=0.4, label="Male", color='orange', align='center')
ax1.set_xlabel('Location')
ax1.set_ylabel('Percentage Of All Incidents')
ax1.set_title('Female vs Male Total by Location')
ax1.set_xticks([i for i in x_location])
ax1.set_xticklabels(locations, rotation=45)
ax1.legend()
# Second subplot (Motive)
ax2.bar([i - 0.2 for i in x_motive], female_motive["total_female"] / female_motive["total_female"].sum(), width=0.4, label="Female", color='blue', align='center')
ax2.bar([i + 0.2 for i in x_motive], male_motive["total_male"] / male_motive["total_male"].sum(), width=0.4, label="Male", color='orange', align='center')
ax2.set_xlabel('Motive')
ax2.set_ylabel('Percentage Of All Incidents')
ax2.set_title('Female vs Male Total by Motive')
ax2.set_xticks([i for i in x_motive])
ax2.set_xticklabels(motives, rotation=45)
ax2.legend()
# Adjust layout to make sure everything fits
plt.tight_layout()
# Show the plot
plt.show()/var/folders/8n/rfnmqbtd0tvdljpw_xbt64jh0000gn/T/ipykernel_86281/2983598639.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
ideas:
dropdown by location / motive over time for each gender